#
# createSocialItemsOnlyData.R
#
# Create a subset of the ANES cumulative data with
# social policy items only.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

# ANES cumulative file modified by create2012Data.R and mergeCrossSectionalItems.R.
load("anes_cdf_extra_50s60s.RData")

# Set of items to be social items. Pulled by looking at listing in
# "combined" data.frame created in makeCumulativeMap.R.
social.var.numbers <- c("school prayer 1"="VCF9043","school prayer 2"="VCF9051",
                        "rights of accused"="VCF0832",
                        "equal rights amnd"="VCF0833","women equal role"="VCF0834",
                        "abortion"="VCF0837","abortion 2"="VCF0838",
                        "gay discrim"="VCF0876a","gays in military"="VCF0877a",
                        "gays adopt"="VCF0878")
additional.vars.to.keep <- c("year"="VCF0004","respondent"="VCF0006",
                             "party"="VCF0301","weight"="VCF0009a")

# Save out subset of data.
anes.cdf.with.2012 <- anes.cdf.with.2012[,c(social.var.numbers,additional.vars.to.keep)]

no.social <- rowSums(!is.na(anes.cdf.with.2012[,social.var.numbers])) == 0
cat("Dropping",sum(no.social),"cases without observation on any social items...
 number dropped by year:\n")
print(table(anes.cdf.with.2012$VCF0004,no.social))
anes.cdf.with.2012 <- anes.cdf.with.2012[!no.social,]
save(anes.cdf.with.2012,additional.2012.items,extra.vars.50s.60s,social.var.numbers,
    file="anes_cdf_extra_50s60s_social_only.RData")

#
# Plot item coverage by year.
#
cat("Number of non-missing observations by year:\n")
library(data.table)
DT <- data.table(anes.cdf.with.2012)
# Aggregate number of non-missing obs by item and year.
num.miss <- DT[,lapply(.SD,function(x) sum(!is.na(x))),
                        .SDcols=social.var.numbers,by="VCF0004"]
items <- rev(sort(names(num.miss)))
items <- items[items != "VCF0004"]
par.old <- par(mar=c(4.1,6.1,1.1,1.1))
plot(x=num.miss[,range(VCF0004)],y=c(1,length(items)),type='n',ann=F,axes=F)
axis(1,at=num.miss[,seq(min(VCF0004),max(VCF0004),by=2)])
axis(2,las=2,at=1:length(items),labels=items,cex.axis=.9,tick=F)
abline(h=1:length(items),col='gray')
for (i in 1:length(items)) {
  flag <- num.miss[[items[i]]] != 0
  points(x=num.miss[flag,VCF0004],y=rep(i,sum(flag)),pch=19)
}
par(par.old)
